packages <- c("CIMseq", "CIMseq.data", "tidyverse", "circlize", "printr")
purrr::walk(packages, library, character.only = TRUE)
rm(packages)
##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
load('../../MGA.analysis_enge20/data/pal.rda')
#there are 5 cells that were classified as colon but sorted as SI. These have to
#be removed manually
c <- getData(cObjSng, "classification")
s <- names(c[c %in% c("4", "9")])
i <- which(colnames(getData(cObjSng, "counts")) %in% s)
cObjSng <- CIMseqSinglets(
getData(cObjSng, "counts")[, -i],
getData(cObjSng, "counts.ercc")[, -i],
getData(cObjSng, "dim.red")[-i, ],
getData(cObjSng, "classification")[-i]
)
p <- plotUnsupervisedClass(cObjSng, cObjMul, pal)
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20.classes.pdf',
device = cairo_pdf,
height = 180,
width = 180,
units = "mm"
)
p <- plotUnsupervisedMarkers(
cObjSng, cObjMul,
c("Lgr5", "Ptprc", "Chga", "Dclk1", "Slc26a3", "Atoh1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20.markers.pdf',
device = cairo_pdf,
height = 180,
width = 180,
units = "mm"
)
p <- plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Mki67"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20.Mki67.pdf',
device = cairo_pdf,
height = 180,
width = 180,
units = "mm"
)
adj <- adjustFractions(cObjSng, cObjMul, sObj)
as.data.frame(table(apply(adj, 1, sum)))
| Var1 | Freq |
|---|---|
| 0 | 142 |
| 1 | 571 |
| 2 | 588 |
| 3 | 268 |
| 4 | 94 |
| 5 | 32 |
| 6 | 5 |
| 7 | 3 |
tibble(fractions = c(fractions)) %>%
ggplot() +
geom_histogram(aes(fractions), binwidth = 0.01) +
theme_bw()
tibble(
nCellTypes = apply(adj, 1, sum),
cost = getData(sObj, "costs")
) %>%
ggplot() +
geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
theme_bw()
tibble(
sample = names(getData(sObj, "costs")),
cost = unname(getData(sObj, "costs"))
) %>%
inner_join(
select(estimateCells(cObjSng, cObjMul), sample, estimatedCellNumber),
by = "sample"
) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number",
breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
) +
theme_bw()
ercc <- filter(estimateCells(cObjSng, cObjMul), sampleType == "Multiplet")
nConnections <- apply(adj, 1, sum)
nConnections <- nConnections[match(ercc$sample, names(nConnections))]
tibble(
detectedConnections = round(nConnections),
estimatedCellNumber = round(ercc$estimatedCellNumber)
) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number",
breaks = 0:max(round(ercc$estimatedCellNumber))
) +
scale_y_continuous(
name = "Detected cell number",
breaks = 0:max(round(nConnections))
) +
theme_bw()
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.counts = colSums(getData(cObjMul, "counts"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total counts") +
theme_bw()
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.ercc = colSums(getData(cObjMul, "counts.ercc"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total ERCC counts") +
theme_bw()
singlets <- c(table(getData(cObjSng, "classification")))
singlets <- singlets / sum(singlets)
deconv <- colSums(adjustFractions(cObjSng, cObjMul, sObj))
deconv <- deconv[match(names(singlets), names(deconv))]
deconv <- deconv / sum(deconv)
if(!identical(names(singlets), names(deconv))) stop("name mismatch")
p <- tibble(
class = names(singlets),
singlet.freq = singlets,
multiplet.freq = deconv
) %>%
ggplot() +
geom_point(aes(singlet.freq, multiplet.freq, colour = class), size = 3) +
scale_colour_manual(values = pal[order(names(pal))]) +
xlim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
ylim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
geom_abline(slope = 1, intercept = 0, lty = 3, colour = "grey") +
labs(x = "Singlet relative frequency", y = "Multiplet relative frequency") +
guides(colour = guide_legend(title = "Cell Type")) +
theme_bw()
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20.sngMulRelFreq.pdf',
device = cairo_pdf,
height = 180,
width = 180,
units = "mm"
)
plotSwarmCircos(
sObj, cObjSng, cObjMul, classOrder = cOrder, classColour = pal[cOrder],
h.ratio = 0.85
)
## Joining, by = "class"
Only detected duplicates, triplicates, and quadruplicates.
ERCC estimated cell number set to max 4.
Weight cutoff = 10. Alpha = 1e-3.
# adj <- adjustFractions(cObjSng, cObjMul, sObj, binary = TRUE)
# samples <- rownames(adj)
# rs <- rowSums(adj)
# keep <- rs == 2 | rs == 3 | rs == 4
plotSwarmCircos(
sObj, cObjSng, cObjMul, weightCut = 10,
classOrder = cOrder, theoretical.max = 4, classColour = pal[cOrder],
h.ratio = 0.85, alpha = 1e-3
)
## Joining, by = "class"
pdf('../figures/MGA.enge20.circos.pdf', width = 9.5, height = 9.5, onefile = FALSE)
plotSwarmCircos(
sObj, cObjSng, cObjMul, weightCut = 10,
classOrder = cOrder, theoretical.max = 4, classColour = pal[cOrder],
h.ratio = 0.85, alpha = 1e-3
)
## Joining, by = "class"
dev.off()
## quartz_off_screen
## 2
Calculate the probability of observing Lgr5 expression when Plet1 is or is not expressed in Muc2 high expressing multiplets.
p <- getData(cObjMul, "counts.cpm") %>%
.[c("Plet1", "Lgr5", "Muc2"), ] %>%
t() %>%
matrix_to_tibble("sample") %>%
filter(Muc2 > 3000) %>% #include only Muc2 high
mutate(
express.plet1 = if_else(Plet1 > 0, TRUE, FALSE),
express.lgr5 = if_else(Lgr5 > 0, TRUE, FALSE)
) %>%
group_by(express.plet1, express.lgr5) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(express.plet1) %>%
mutate(total = sum(n)) %>%
ungroup() %>%
mutate(lgr5.prob = n / total) %>%
filter((express.plet1 == TRUE & express.lgr5 == TRUE) | (express.plet1 == FALSE & express.lgr5 == TRUE)) %>%
ggplot() +
geom_bar(aes(express.plet1, lgr5.prob), stat = "identity", position = position_dodge(width = 1)) +
labs(x = "Plet1 expressed", y = "Lgr5 probability") +
ggthemes::theme_few()
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20.Lgr5prob.pdf',
device = cairo_pdf,
height = 180,
width = 90,
units = "mm"
)
pdata <- adjustFractions(cObjSng, cObjMul, sObj, theoretical.max = 4) %>%
matrix_to_tibble("sample") %>%
filter(C.Goblet.proximal == 1) %>%
select(-C.Goblet.proximal) %>%
gather(class, binary, -sample) %>%
group_by(sample) %>%
summarize(others = paste(class[binary == 1], collapse = ", ")) %>%
mutate(others = map(others, ~str_split(.x, ", ")[[1]])) %>%
unnest() %>%
filter(others != "") %>%
group_by(others) %>%
summarize(prob = n() / nrow(.)) %>%
rename(class = others) %>%
full_join(tibble(class = unique(getData(cObjSng, "classification")))) %>%
filter(class != "C.Goblet.proximal") %>%
replace_na(list(prob = 0))
## Joining, by = "class"
p <- pdata %>%
ggplot() +
geom_bar(aes(class, prob), stat = "identity", position = position_dodge(width = 1)) +
geom_text(aes(class, prob + 0.01, label = round(prob, digits = 3))) +
theme_bw() +
labs(y = "Probability") +
theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 90))
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20.PletIntProb.pdf',
device = cairo_pdf,
height = 240,
width = 240,
units = "mm"
)
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] printr_0.1 circlize_0.4.6 forcats_0.4.0
## [4] stringr_1.4.0 dplyr_0.8.3 purrr_0.3.2
## [7] readr_1.3.1 tidyr_0.8.3 tibble_2.1.3
## [10] ggplot2_3.2.0 tidyverse_1.2.1 CIMseq.data_0.0.1.4
## [13] CIMseq_0.3.0.3
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-140 matrixStats_0.54.0 lubridate_1.7.4.9000
## [4] RColorBrewer_1.1-2 httr_1.4.0 gmodels_2.18.1
## [7] tools_3.5.3 backports_1.1.4 R6_2.4.0
## [10] lazyeval_0.2.2 BiocGenerics_0.28.0 colorspace_1.4-1
## [13] withr_2.1.2 tidyselect_0.2.5 gridExtra_2.3
## [16] compiler_3.5.3 cli_1.1.0 rvest_0.3.4
## [19] xml2_1.2.0 labeling_0.3 scales_1.0.0
## [22] digest_0.6.20 rmarkdown_1.13 pkgconfig_2.0.2
## [25] htmltools_0.3.6 highr_0.8 rlang_0.4.0.9000
## [28] GlobalOptions_0.1.0 ggthemes_4.2.0 readxl_1.3.1
## [31] rstudioapi_0.10 shape_1.4.4 farver_1.1.0
## [34] generics_0.0.2 jsonlite_1.6 gtools_3.8.1
## [37] magrittr_1.5 Rcpp_1.0.2 munsell_0.5.0
## [40] S4Vectors_0.20.1 viridis_0.5.1 stringi_1.4.3
## [43] EngeMetadata_0.1.2 yaml_2.2.0 ggraph_1.0.2
## [46] MASS_7.3-51.4 Rtsne_0.15 plyr_1.8.4
## [49] grid_3.5.3 parallel_3.5.3 gdata_2.18.0
## [52] listenv_0.7.0 ggrepel_0.8.1 crayon_1.3.4
## [55] lattice_0.20-38 haven_2.1.0 hms_0.5.0
## [58] zeallot_0.1.0 knitr_1.23 pillar_1.4.2
## [61] igraph_1.2.4.1 pso_1.0.3 future.apply_1.3.0
## [64] codetools_0.2-16 stats4_3.5.3 glue_1.3.1
## [67] evaluate_0.14 modelr_0.1.4 vctrs_0.2.0
## [70] tweenr_1.0.1 cellranger_1.1.0 gtable_0.3.0
## [73] RANN_2.6.1 polyclip_1.10-0 future_1.14.0
## [76] assertthat_0.2.1 xfun_0.8 gridBase_0.4-7
## [79] ggforce_0.2.2 broom_0.5.2 tidygraph_1.1.2
## [82] googledrive_0.1.3 viridisLite_0.3.0 globals_0.12.4